library("tidyverse")
## ── Attaching packages ─────────────────────────────────────────── tidyverse 1.3.0 ──
## ✓ ggplot2 3.3.2     ✓ purrr   0.3.4
## ✓ tibble  3.0.3     ✓ dplyr   1.0.2
## ✓ tidyr   1.1.2     ✓ stringr 1.4.0
## ✓ readr   1.3.1     ✓ forcats 0.5.0
## ── Conflicts ────────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag()    masks stats::lag()
library(maps)
## 
## Attaching package: 'maps'
## The following object is masked from 'package:purrr':
## 
##     map
library(mapdata)
library(lubridate)
## 
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
## 
##     date, intersect, setdiff, union
library(viridis)
## Loading required package: viridisLite
library(wesanderson)
library(plotly)
## 
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
## 
##     last_plot
## The following object is masked from 'package:stats':
## 
##     filter
## The following object is masked from 'package:graphics':
## 
##     layout
library(RColorBrewer)
# To display only colorblind-friendly brewer palettes, specify the option colorblindFriendly = TRUE as follow:
# display.brewer.all(colorblindFriendly = TRUE)
# loading data

daily_report <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/04-02-2020.csv")) %>% 
  rename(Long = "Long_") 
## Parsed with column specification:
## cols(
##   FIPS = col_double(),
##   Admin2 = col_character(),
##   Province_State = col_character(),
##   Country_Region = col_character(),
##   Last_Update = col_character(),
##   Lat = col_double(),
##   Long_ = col_double(),
##   Confirmed = col_double(),
##   Deaths = col_double(),
##   Recovered = col_double(),
##   Active = col_double(),
##   Combined_Key = col_character()
## )
# graphing data

ggplot(daily_report, aes(x = Long, y = Lat, size = Confirmed/1000)) +
    borders("world", colour = NA, fill = "grey90") +
    theme_bw() +
    geom_point(shape = 21, color='purple', fill='purple', alpha = 0.5) +
    labs(title = 'World COVID-19 Confirmed cases',x = '', y = '',
        size="Cases (x1000))") +
    theme(legend.position = "right") +
    coord_fixed(ratio=1.5)
## Warning: Removed 54 rows containing missing values (geom_point).

# load us data

daily_report <-   read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/04-05-2020.csv")) %>% 
  rename(Long = "Long_") %>% 
  filter(Country_Region == "US") %>% 
  filter (!Province_State %in% c("Alaska","Hawaii", "American Samoa",
                  "Puerto Rico","Northern Mariana Islands", 
                  "Virgin Islands", "Recovered", "Guam", "Grand Princess",
                  "District of Columbia", "Diamond Princess")) %>% 
  filter(Lat > 0)
## Parsed with column specification:
## cols(
##   FIPS = col_character(),
##   Admin2 = col_character(),
##   Province_State = col_character(),
##   Country_Region = col_character(),
##   Last_Update = col_datetime(format = ""),
##   Lat = col_double(),
##   Long_ = col_double(),
##   Confirmed = col_double(),
##   Deaths = col_double(),
##   Recovered = col_double(),
##   Active = col_double(),
##   Combined_Key = col_character()
## )
# graph us data

ggplot(daily_report, aes(x = Long, y = Lat, size = Confirmed/1000)) +
    borders("state", colour = "black", fill = "grey90") +
    theme_bw() +
    geom_point(shape = 21, color='purple', fill='purple', alpha = 0.5) +
    labs(title = 'COVID-19 Confirmed Cases in the US', x = '', y = '',
        size="Cases (x1000))") +
    theme(legend.position = "right") +
    coord_fixed(ratio=1.5)

mybreaks <- c(1, 100, 1000, 10000, 10000)
ggplot(daily_report, aes(x = Long, y = Lat, size = Confirmed)) +
    borders("state", colour = "white", fill = "grey90") +
    geom_point(aes(x=Long, y=Lat, size=Confirmed, color=Confirmed),stroke=F, alpha=0.7) +
    scale_size_continuous(name="Cases", trans="log", range=c(1,7), 
                        breaks=mybreaks, labels = c("1-99",
                        "100-999", "1,000-9,999", "10,000-99,999", "50,000+")) +
    scale_color_viridis_c(option="viridis",name="Cases",
                        trans="log", breaks=mybreaks, labels = c("1-99",
                        "100-999", "1,000-9,999", "10,000-99,999", "50,000+"))  +
# Cleaning up the graph
  
  theme_void() + 
    guides( colour = guide_legend()) +
    labs(title = "Anisa Dhana's lagout for COVID-19 Confirmed Cases in the US'") +
    theme(
      legend.position = "bottom",
      text = element_text(color = "#22211d"),
      plot.background = element_rect(fill = "#ffffff", color = NA), 
      panel.background = element_rect(fill = "#ffffff", color = NA), 
      legend.background = element_rect(fill = "#ffffff", color = NA)
    ) +
    coord_fixed(ratio=1.5)
## Warning: Transformation introduced infinite values in discrete y-axis

## Warning: Transformation introduced infinite values in discrete y-axis
## Warning in sqrt(x): NaNs produced
## Warning: Removed 40 rows containing missing values (geom_point).

# loading world data, filtering to us

daily_report <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/04-02-2020.csv")) %>% 
  rename(Long = "Long_") %>% 
  filter(Country_Region == "US") %>% 
  group_by(Province_State) %>% 
  summarize(Confirmed = sum(Confirmed)) %>% 
  mutate(Province_State = tolower(Province_State))
## Parsed with column specification:
## cols(
##   FIPS = col_double(),
##   Admin2 = col_character(),
##   Province_State = col_character(),
##   Country_Region = col_character(),
##   Last_Update = col_character(),
##   Lat = col_double(),
##   Long_ = col_double(),
##   Confirmed = col_double(),
##   Deaths = col_double(),
##   Recovered = col_double(),
##   Active = col_double(),
##   Combined_Key = col_character()
## )
## `summarise()` ungrouping output (override with `.groups` argument)
# load the US map data

us <- map_data("state")

# We need to join the us map data with our daily report to make one data frame/tibble

state_join <- left_join(us, daily_report, by = c("region" = "Province_State"))
# plot state map

ggplot(data = us, mapping = aes(x = long, y = lat, group = group)) + 
  coord_fixed(1.3) + 
# Add data layer
  geom_polygon(data = state_join, aes(fill = Confirmed), color = "black") +
  scale_fill_gradientn(colours = 
                         wes_palette("Zissou1", 100, type = "continuous"),
                         trans = "log10") +
  labs(title = "COVID-19 Confirmed Cases in the US'")

# Get and format the covid report data
report_03_27_2020 <-   read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/04-02-2020.csv")) %>% 
  rename(Long = "Long_") %>% 
  unite(Key, Admin2, Province_State, sep = ".") %>% 
  group_by(Key) %>% 
  summarize(Confirmed = sum(Confirmed)) %>% 
  mutate(Key = tolower(Key))
## Parsed with column specification:
## cols(
##   FIPS = col_double(),
##   Admin2 = col_character(),
##   Province_State = col_character(),
##   Country_Region = col_character(),
##   Last_Update = col_character(),
##   Lat = col_double(),
##   Long_ = col_double(),
##   Confirmed = col_double(),
##   Deaths = col_double(),
##   Recovered = col_double(),
##   Active = col_double(),
##   Combined_Key = col_character()
## )
## `summarise()` ungrouping output (override with `.groups` argument)
# dim(report_03_27_2020)

# get and format the map data

us <- map_data("state")
counties <- map_data("county") %>% 
  unite(Key, subregion, region, sep = ".", remove = FALSE)

# Join the 2 tibbles

state_join <- left_join(counties, report_03_27_2020, by = c("Key"))

# sum(is.na(state_join$Confirmed))

# plot new data
ggplot(data = us, mapping = aes(x = long, y = lat, group = group)) + 
  coord_fixed(1.3) + 
  # Add data layer
  borders("state", colour = "black") +
  geom_polygon(data = state_join, aes(fill = Confirmed)) +
  scale_fill_gradientn(colors = brewer.pal(n = 5, name = "PuRd"),
                       breaks = c(1, 10, 100, 1000, 10000, 100000),
                       trans = "log10", na.value = "White") +
  ggtitle("Number of Confirmed Cases by US County") +
  theme_bw() 
## Warning: Transformation introduced infinite values in discrete y-axis

# load data

daily_report <-   read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/04-02-2020.csv")) %>% 
  rename(Long = "Long_") %>% 
  filter(Province_State == "Massachusetts") %>% 
  group_by(Admin2) %>% 
  summarize(Confirmed = sum(Confirmed)) %>% 
  mutate(Admin2 = tolower(Admin2))
## Parsed with column specification:
## cols(
##   FIPS = col_double(),
##   Admin2 = col_character(),
##   Province_State = col_character(),
##   Country_Region = col_character(),
##   Last_Update = col_character(),
##   Lat = col_double(),
##   Long_ = col_double(),
##   Confirmed = col_double(),
##   Deaths = col_double(),
##   Recovered = col_double(),
##   Active = col_double(),
##   Combined_Key = col_character()
## )
## `summarise()` ungrouping output (override with `.groups` argument)
# state data

us <- map_data("state")

# Massachusetts data

ma_us <- subset(us, region == "massachusetts")

# Ounties within  MA

counties <- map_data("county")
ma_county <- subset(counties, region == "massachusetts")

# joining tables

state_join <- left_join(ma_county, daily_report, by = c("subregion" = "Admin2")) 


# plot state map

ggplot(data = ma_county, mapping = aes(x = long, y = lat, group = group)) + 
  coord_fixed(1.3) + 
# Add data layer
  geom_polygon(data = state_join, aes(fill = Confirmed), color = "white") +
    scale_fill_gradientn(colors = brewer.pal(n = 5, name = "BuGn"),
                         trans = "log10") +
  labs(title = "COVID-19 Confirmed Cases in Massachusetts'")

# create interactive graph with plotly with MA data

library(plotly)
ggplotly(
  ggplot(data = ma_county, mapping = aes(x = long, y = lat, group = group)) + 
  coord_fixed(1.3) + 
# Add data layer
  geom_polygon(data = state_join, aes(fill = Confirmed), color = "black") +
    scale_fill_gradientn(colours = 
                         wes_palette("Zissou1", 100, type = "continuous")) +
  ggtitle("COVID-19 Cases in MA") +
# Cleaning up the graph
  labs(x=NULL, y=NULL) +
  theme(panel.border = element_blank()) +
  theme(panel.background = element_blank()) +
  theme(axis.ticks = element_blank()) +
  theme(axis.text = element_blank())
)
## Warning: `group_by_()` is deprecated as of dplyr 0.7.0.
## Please use `group_by()` instead.
## See vignette('programming') for more help
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_warnings()` to see where this warning was generated.

Exercises

### For the above graph “World COVID-19 Confirmed case” summarize the counts for each Country on the graph and update the graph to 9/26/2020. You may need to adjust the size of the points.

### load data from 9.26.20
daily_report_ex1 <- read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/09-26-2020.csv")) %>% 
  rename(Long = "Long_") 
## Parsed with column specification:
## cols(
##   FIPS = col_double(),
##   Admin2 = col_character(),
##   Province_State = col_character(),
##   Country_Region = col_character(),
##   Last_Update = col_datetime(format = ""),
##   Lat = col_double(),
##   Long_ = col_double(),
##   Confirmed = col_double(),
##   Deaths = col_double(),
##   Recovered = col_double(),
##   Active = col_double(),
##   Combined_Key = col_character(),
##   Incidence_Rate = col_double(),
##   `Case-Fatality_Ratio` = col_double()
## )
### we adjust the confirmed cases by dividing by 1 million

ggplot(daily_report_ex1, aes(x = Long, y = Lat, size = Confirmed/1000000)) +
    borders("world", colour = NA, fill = "grey90") +
    theme_bw() +
    geom_point(shape = 21, color='blue', fill='red', alpha = 0.5) +
    labs(title = 'World COVID-19 Confirmed cases',x = '', y = '',
        size="Cases (x1000000))") +
    theme(legend.position = "right") +
    coord_fixed(ratio=1.5)
## Warning: Removed 81 rows containing missing values (geom_point).

Exercise 2

### Update Anisa Dhana’s graph layout of the US to 9/26/2020. You may need to adjust the size of the points.

### load us data from 9.26.20

daily_report_ex2 <-   read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/09-26-2020.csv")) %>% 
  rename(Long = "Long_") %>% 
  filter(Country_Region == "US") %>% 
  filter (!Province_State %in% c("Alaska","Hawaii", "American Samoa",
                  "Puerto Rico","Northern Mariana Islands", 
                  "Virgin Islands", "Recovered", "Guam", "Grand Princess",
                  "District of Columbia", "Diamond Princess")) %>% 
  filter(Lat > 0)
## Parsed with column specification:
## cols(
##   FIPS = col_double(),
##   Admin2 = col_character(),
##   Province_State = col_character(),
##   Country_Region = col_character(),
##   Last_Update = col_datetime(format = ""),
##   Lat = col_double(),
##   Long_ = col_double(),
##   Confirmed = col_double(),
##   Deaths = col_double(),
##   Recovered = col_double(),
##   Active = col_double(),
##   Combined_Key = col_character(),
##   Incidence_Rate = col_double(),
##   `Case-Fatality_Ratio` = col_double()
## )
mybreaks <- c(100, 1000, 10000, 100000, 1000000)
ggplot(daily_report_ex2, aes(x = Long, y = Lat, size = Confirmed)) +
    borders("state", colour = "white", fill = "grey90") +
    geom_point(aes(x=Long, y=Lat, size=Confirmed, color=Confirmed),stroke=F, alpha=0.7) +
    scale_size_continuous(name="Cases", trans="log", range=c(1,7), 
                        breaks=mybreaks, labels = c("100-999",
                        "1,000-9,999", "10,000-99,999", "100,000-999,999", "1,000,000-9,999,999")) +
  
    scale_color_viridis_c(option="viridis",name="Cases",
                        trans="log", breaks=mybreaks, labels = c("100-999",
                        "1,000-9,999", "10,000-99,999", "100,000-999,999", "1,000,000-9,999,999"))  +
# Cleaning up the graph
  
  theme_void() + 
    guides( colour = guide_legend()) +
    labs(title = "Anisa Dhana's lagout for COVID-19 Confirmed Cases in the US - 9/26/20") +
    theme(
      legend.position = "bottom",
      text = element_text(color = "#22211d"),
      plot.background = element_rect(fill = "#ffffff", color = NA), 
      panel.background = element_rect(fill = "#ffffff", color = NA), 
      legend.background = element_rect(fill = "#ffffff", color = NA)
    ) +
    coord_fixed(ratio=1.5)
## Warning: Transformation introduced infinite values in discrete y-axis

## Warning: Transformation introduced infinite values in discrete y-axis
## Warning in sqrt(x): NaNs produced
## Warning: Removed 6 rows containing missing values (geom_point).

Exercise 3

### Update the above graph “Number of Confirmed Cases by US County” to 9/26/2020 and use a different color scheme or theme

# Get and format the covid report data
report_09_26_2020 <-   read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/09-26-2020.csv")) %>% 
  rename(Long = "Long_") %>% 
  unite(Key, Admin2, Province_State, sep = ".") %>% 
  group_by(Key) %>% 
  summarize(Confirmed = sum(Confirmed)) %>% 
  mutate(Key = tolower(Key))
## Parsed with column specification:
## cols(
##   FIPS = col_double(),
##   Admin2 = col_character(),
##   Province_State = col_character(),
##   Country_Region = col_character(),
##   Last_Update = col_datetime(format = ""),
##   Lat = col_double(),
##   Long_ = col_double(),
##   Confirmed = col_double(),
##   Deaths = col_double(),
##   Recovered = col_double(),
##   Active = col_double(),
##   Combined_Key = col_character(),
##   Incidence_Rate = col_double(),
##   `Case-Fatality_Ratio` = col_double()
## )
## `summarise()` ungrouping output (override with `.groups` argument)
# dim(report_09_26_2020)

# get and format the map data

us_ex3 <- map_data("state")

counties_ex3 <- map_data("county") %>% 
  unite(Key, subregion, region, sep = ".", remove = FALSE)

# Join the 2 tibbles

state_join_ex3 <- left_join(counties_ex3, report_09_26_2020, by = c("Key"))

# sum(is.na(state_join$Confirmed))

# plot new data
ggplot(data = us_ex3, mapping = aes(x = long, y = lat, group = group)) + 
  coord_fixed(1.3) + 
  # Add data layer
  borders("state", colour = "black") +
  geom_polygon(data = state_join_ex3, aes(fill = Confirmed)) +
  scale_fill_gradientn(colors = brewer.pal(n = 5, name = "Set2"),
                       breaks = c(1, 10, 100, 1000, 10000, 100000),
                       trans = "log10", na.value = "White") +
  ggtitle("Number of Confirmed Cases by US County") +
  theme_bw() 
## Warning: Transformation introduced infinite values in discrete y-axis

Exercise 4

### Make an interactive plot using a state of your choosing using a theme different from used in the above examples.

### Use Florida data

daily_report_ex4 <-   read_csv(url("https://raw.githubusercontent.com/CSSEGISandData/COVID-19/master/csse_covid_19_data/csse_covid_19_daily_reports/09-26-2020.csv")) %>% 
  rename(Long = "Long_") %>% 
  filter(Province_State == "Florida") %>% 
  group_by(Admin2) %>% 
  summarize(Confirmed = sum(Confirmed)) %>% 
  mutate(Admin2 = tolower(Admin2))
## Parsed with column specification:
## cols(
##   FIPS = col_double(),
##   Admin2 = col_character(),
##   Province_State = col_character(),
##   Country_Region = col_character(),
##   Last_Update = col_datetime(format = ""),
##   Lat = col_double(),
##   Long_ = col_double(),
##   Confirmed = col_double(),
##   Deaths = col_double(),
##   Recovered = col_double(),
##   Active = col_double(),
##   Combined_Key = col_character(),
##   Incidence_Rate = col_double(),
##   `Case-Fatality_Ratio` = col_double()
## )
## `summarise()` ungrouping output (override with `.groups` argument)
us_ex4 <- map_data("state")

fl_us <- subset(us_ex4, region == "florida")

counties <- map_data("county")

fl_county <- subset(counties, region == "florida")

state_join_ex4 <- left_join(fl_county, daily_report_ex4, by = c("subregion" = "Admin2")) 

# plot state map

ggplotly(
  ggplot(data = fl_county, mapping = aes(x = long, y = lat, group = group)) + 
  coord_fixed(1.3) + 
# Add data layer
  geom_polygon(data = state_join_ex4, aes(fill = Confirmed), color = "black") +
    scale_fill_gradientn(colours = 
                         wes_palette("FantasticFox1", 100, type = "continuous")) +
  ggtitle("COVID-19 Cases in FL - 9/26/20") +
# Cleaning up the graph
  labs(x=NULL, y=NULL) +
  theme(panel.border = element_blank()) +
  theme(panel.background = element_blank()) +
  theme(axis.ticks = element_blank()) +
  theme(axis.text = element_blank())
)

Exercise 5

### Create a report with static maps and interactive graphs that is meant to be read by others (e.g. your friends and family). Hide warnings, messages and even the code you used so that it is readable. Included references. Link to the Lab 10 report from your Github site. Submit the link to Moodle. animations that is meant to be read by others (e.g. your friends and family). Hide warnings, messages and even the code you used so that it is readable. Included references. Link to the Lab 6 report from your Github site. Submit the link to Moodle.